#' ---
#' title: Reproduce Figure 1 (Twitter Sentiment Analysis Task)
#' date: 2024-10-01
#' version: 1.0
#' ---

library(tidyverse)
library(promptr)
library(sentimentr)
set.seed(42)

## Load Data -------------------

# Load dataset (included in the promptr package)
d <- promptr::scotus_tweets |> 
  mutate(expert_score = (expert1 + expert2 + expert3) / 3)

# Load GPT-3 responses
load('application1-few-shot-gpt-3.RData')

# Load GPT-4 responses
load('application1-few-shot-gpt-4.RData')

# TweetNLP classifications
tweet_nlp <- read_csv('application1-TweetNLP.csv')

# Naive Bayes classifications
load('application1-naive-bayes.RData')

## Compute Scores -------------------------

### GPT-3 -----------------

# create a "long" dataframe with each token and its predicted probability
tokens <- scotus_tweets |> 
  select(tweet_id, token1:token5) |> 
  pivot_longer(cols = token1:token5,
               values_to = 'sentiment') |> 
  select(-name)

probs <- scotus_tweets |> 
  select(tweet_id, prob1:prob5) |> 
  pivot_longer(cols = prob1:prob5,
               values_to = 'probability') |> 
  select(-name)

tokens$probability <- probs$probability 

# remove whitespace and capitalize
tokens <- tokens |> 
  mutate(sentiment = str_trim(sentiment)) |> 
  mutate(sentiment = str_to_title(sentiment)) |> 
  mutate(sentiment = if_else(sentiment == 'Neg', 'Negative', sentiment),
         sentiment = if_else(sentiment == 'Pos', 'Positive', sentiment))

# compute GPT-3 sentiment score by taking the first component in a PCA
gpt_3_sentiment <- tokens |> 
  group_by(tweet_id, sentiment) |> 
  summarize(probability = sum(probability)) |> 
  filter(sentiment %in% c('Positive', 'Negative', 'Neutral')) |> 
  pivot_wider(names_from = 'sentiment',
              values_from = 'probability', 
              values_fill = 0) |> 
  filter(!is.na(Negative)) |> 
  ungroup()

p <- gpt_3_sentiment |> 
  select(Negative, Neutral, Positive) |> 
  princomp()


gpt_3_sentiment$gpt_3_score <- -1 * p$scores[,1]

d <- left_join(d, gpt_3_sentiment, by = 'tweet_id')


### GPT-4 ----------------

d$gpt_4_score <- out |>
  lapply(mutate, token = str_to_lower(str_trim(token))) |>
  lapply(summarize,
         negative = sum(probability[token=='negative']),
         neutral = sum(probability[token=='neutral']),
         positive = sum(probability[token=='positive'])) |> 
  lapply(summarize,score=positive-negative) |>
  unlist()

### TweetNLP ------------

# construct continuous measure of sentiment by taking the first component in a PCA
p <- tweet_nlp |> 
  select(negative, neutral, positive) |> 
  princomp()

tweet_nlp$tweetnlp_sentiment <- -1 * p$scores[,1]

d <- left_join(d,
               tweet_nlp |> select(tweet_id, tweetnlp_sentiment),
               by = 'tweet_id')

### Naive Bayes -------------

d <- d |> 
  left_join(nb |> select(tweet_id, prob_positive),
            by = 'tweet_id')

### Dictionary Classification --------------------

tweets <- gsub('[[:punct:] ]+',' ', d$text) # Remove Punctuation
tweets <- gsub('[[:digit:]]+', '', tweets) # Remove Numbers
tweets <- iconv(tweets, "UTF-8", "UTF-8",sub='')
tweets <- str_trim(tweets) # Remove Trailing White Space

d$dictionary_sentiment <- sentiment(tweets)$sentiment


## Build Subplots ------------------------

### GPT-3 -------------------
correlation <- cor(d$gpt_3_score,
                   d$expert_score,
                   use = 'pairwise.complete.obs')

gpt3_subplot <- ggplot(data = d,
       mapping = aes(
         x = expert_score,
         y = gpt_3_score
       )) +
  geom_jitter(width = 0.1, alpha = 0.7) +
  labs(x = 'Hand-Coded Sentiment Score',
       y = 'GPT-3 Positivity',
       title = paste0('GPT-3', ' ',
                      '(\U03C1 = ',
                      round(correlation, 2),
                      ')')) +
  theme_bw() +
  geom_smooth(method = 'lm', se = FALSE, color = 'gray')

### GPT-4 ------------------
correlation <- cor(d$gpt_4_score,
                   d$expert_score,
                   use = 'pairwise.complete.obs')

gpt4_subplot <- ggplot(data = d,
                       mapping = aes(
                         x = expert_score,
                         y = gpt_4_score
                       )) +
  geom_jitter(width = 0.1, alpha = 0.7) +
  labs(x = 'Hand-Coded Sentiment Score',
       y = 'GPT-4 Sentiment Score',
       title = paste0('GPT-4', ' ',
                      '(\U03C1 = ',
                      round(correlation, 2),
                      ')')) +
  theme_bw() +
  geom_smooth(method = 'lm', se = FALSE, color = 'gray')

### TweetNLP ------------------
tweetnlp_correlation <- cor(d$expert_score, d$tweetnlp_sentiment, use = 'pairwise.complete.obs')

tweetnlp_subplot <- 
  ggplot(data = d,
         mapping = aes(x=expert_score, y=tweetnlp_sentiment)) +
  geom_jitter(alpha = 0.6, width = 0.1, height = 0) +
  labs(x = 'Hand-Coded Sentiment Score', 
       y = 'TweetNLP Positivity',
       title = paste0('TweetNLP ',
                      '(\U03C1 = ',
                      round(tweetnlp_correlation, 2),
                      ')')) +
  theme_bw() +
  geom_smooth(method = 'lm', se = FALSE, color = 'gray')

### Naive Bayes ----------------------------

nb_correlation <- cor(d$expert_score, d$prob_positive,
                      use = 'pairwise.complete.obs')

nb_subplot <- d |> 
  ggplot(mapping = aes(x=expert_score, y=prob_positive)) +
  geom_jitter(alpha = 0.6, width = 0.1, height = 0) +
  labs(x = 'Hand-Coded Sentiment Score', 
       y = 'NB Probability Positive',
       title = paste0('Naive Bayes (\U03C1 = ',
                      round(nb_correlation, 2),
                      ')')) +
  theme_bw() +
  geom_smooth(method = 'lm', se = FALSE, color = 'gray')

### Dictionary Classification ---------------------------

# dictionary correlation with expert codes
dictionary_correlation <- cor(d$expert_score, d$dictionary_sentiment, 
                              use = 'pairwise.complete.obs')

# plot against expert score 
dictionary_subplot <- ggplot(data = d,
                             mapping = aes(x=expert_score, 
                                           y=dictionary_sentiment)) +
  geom_jitter(alpha = 0.6, width = 0.1) +
  labs(x = 'Hand-Coded Sentiment Score', 
       y = 'Dictionary Positivity Score',
       title = paste0('Dictionary Classification (\U03C1 = ', 
                      round(dictionary_correlation, 2), 
                      ')')) +
  theme_bw() +
  geom_smooth(method = 'lm', se = FALSE, color = 'gray')

## Combine Subplots -----------------------

library(patchwork)

p <- (gpt3_subplot + gpt4_subplot) / (tweetnlp_subplot + nb_subplot) / (dictionary_subplot + patchwork::plot_spacer())

ggsave(filename = 'figure1.png',
       plot = p,
       width = 10, height = 12)
